{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Pandas的时间序列处理 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 创建"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2017-02-18   -0.543551\n",
      "2017-02-19   -0.759103\n",
      "2017-02-25    0.058956\n",
      "2017-02-26    0.275448\n",
      "2017-03-04   -0.957346\n",
      "2017-03-05   -1.143108\n",
      "dtype: float64\n",
      "<class 'pandas.tseries.index.DatetimeIndex'>\n",
      "<class 'pandas.core.series.Series'>\n"
     ]
    }
   ],
   "source": [
    "from datetime import datetime\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "# 指定index为datetime的list\n",
    "date_list = [datetime(2017, 2, 18), datetime(2017, 2, 19), \n",
    "             datetime(2017, 2, 25), datetime(2017, 2, 26), \n",
    "             datetime(2017, 3, 4), datetime(2017, 3, 5)]\n",
    "time_s = pd.Series(np.random.randn(6), index=date_list)\n",
    "print(time_s)\n",
    "print(type(time_s.index))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DatetimeIndex(['2017-02-18', '2017-02-25', '2017-03-04', '2017-03-11',\n",
      "               '2017-03-18'],\n",
      "              dtype='datetime64[ns]', freq='W-SAT')\n",
      "2017-02-18   -0.921937\n",
      "2017-02-25    0.722167\n",
      "2017-03-04   -0.171531\n",
      "2017-03-11   -1.104664\n",
      "2017-03-18    1.259994\n",
      "Freq: W-SAT, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "# pd.date_range()\n",
    "dates = pd.date_range('2017-02-18', # 起始日期\n",
    "                      periods=5,    # 周期\n",
    "                      freq='W-SAT') # 频率\n",
    "print(dates)\n",
    "print(pd.Series(np.random.randn(5), index=dates))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-0.543550683904\n"
     ]
    }
   ],
   "source": [
    "# 索引位置\n",
    "print(time_s[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-0.543550683904\n"
     ]
    }
   ],
   "source": [
    "# 索引值\n",
    "print(time_s[datetime(2017, 2, 18)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-0.543550683904\n"
     ]
    }
   ],
   "source": [
    "# 可以被解析的日期字符串\n",
    "print(time_s['2017/02/18'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2017-02-18   -0.543551\n",
      "2017-02-19   -0.759103\n",
      "2017-02-25    0.058956\n",
      "2017-02-26    0.275448\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "# 按“年份”、“月份”索引\n",
    "print(time_s['2017-2'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2017-02-26    0.275448\n",
      "2017-03-04   -0.957346\n",
      "2017-03-05   -1.143108\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "# 切片操作\n",
    "print(time_s['2017-2-26':])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 过滤\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2017-02-25    0.058956\n",
       "2017-02-26    0.275448\n",
       "2017-03-04   -0.957346\n",
       "2017-03-05   -1.143108\n",
       "dtype: float64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "time_s.truncate(before='2017-2-25')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2017-02-18   -0.543551\n",
       "2017-02-19   -0.759103\n",
       "2017-02-25    0.058956\n",
       "dtype: float64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "time_s.truncate(after='2017-2-25')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 生成日期范围"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DatetimeIndex(['2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21',\n",
      "               '2017-02-22', '2017-02-23', '2017-02-24', '2017-02-25',\n",
      "               '2017-02-26', '2017-02-27', '2017-02-28', '2017-03-01',\n",
      "               '2017-03-02', '2017-03-03', '2017-03-04', '2017-03-05',\n",
      "               '2017-03-06', '2017-03-07', '2017-03-08', '2017-03-09',\n",
      "               '2017-03-10', '2017-03-11', '2017-03-12', '2017-03-13',\n",
      "               '2017-03-14', '2017-03-15', '2017-03-16', '2017-03-17',\n",
      "               '2017-03-18'],\n",
      "              dtype='datetime64[ns]', freq='D')\n"
     ]
    }
   ],
   "source": [
    "# 传入开始、结束日期，默认生成的该时间段的时间点是按天计算的\n",
    "date_index = pd.date_range('2017/02/18', '2017/03/18')\n",
    "print(date_index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DatetimeIndex(['2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21',\n",
      "               '2017-02-22', '2017-02-23', '2017-02-24', '2017-02-25',\n",
      "               '2017-02-26', '2017-02-27'],\n",
      "              dtype='datetime64[ns]', freq='D')\n"
     ]
    }
   ],
   "source": [
    "# 只传入开始或结束日期，还需要传入时间段\n",
    "print(pd.date_range(start='2017/02/18', periods=10))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DatetimeIndex(['2017-03-09', '2017-03-10', '2017-03-11', '2017-03-12',\n",
      "               '2017-03-13', '2017-03-14', '2017-03-15', '2017-03-16',\n",
      "               '2017-03-17', '2017-03-18'],\n",
      "              dtype='datetime64[ns]', freq='D')\n"
     ]
    }
   ],
   "source": [
    "print(pd.date_range(end='2017/03/18', periods=10))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DatetimeIndex(['2017-02-18 12:13:14', '2017-02-19 12:13:14',\n",
      "               '2017-02-20 12:13:14', '2017-02-21 12:13:14',\n",
      "               '2017-02-22 12:13:14', '2017-02-23 12:13:14',\n",
      "               '2017-02-24 12:13:14', '2017-02-25 12:13:14',\n",
      "               '2017-02-26 12:13:14', '2017-02-27 12:13:14'],\n",
      "              dtype='datetime64[ns]', freq='D')\n",
      "DatetimeIndex(['2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21',\n",
      "               '2017-02-22', '2017-02-23', '2017-02-24', '2017-02-25',\n",
      "               '2017-02-26', '2017-02-27'],\n",
      "              dtype='datetime64[ns]', freq='D')\n"
     ]
    }
   ],
   "source": [
    "# 规范化时间戳 \n",
    "print(pd.date_range(start='2017/02/18 12:13:14', periods=10))\n",
    "print(pd.date_range(start='2017/02/18 12:13:14', periods=10, normalize=True))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 频率与偏移量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DatetimeIndex(['2017-02-18', '2017-02-20', '2017-02-22', '2017-02-24',\n",
      "               '2017-02-26', '2017-02-28', '2017-03-02', '2017-03-04',\n",
      "               '2017-03-06', '2017-03-08', '2017-03-10', '2017-03-12',\n",
      "               '2017-03-14', '2017-03-16', '2017-03-18'],\n",
      "              dtype='datetime64[ns]', freq='2D')\n"
     ]
    }
   ],
   "source": [
    "print(pd.date_range('2017/02/18', '2017/03/18', freq='2D'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "14 days 12:00:00\n",
      "DatetimeIndex(['2017-02-18 00:00:00', '2017-03-04 12:00:00'], dtype='datetime64[ns]', freq='348H')\n"
     ]
    }
   ],
   "source": [
    "# 偏移量通过加法连接\n",
    "sum_offset = pd.tseries.offsets.Week(2) + pd.tseries.offsets.Hour(12)\n",
    "print(sum_offset)\n",
    "\n",
    "print(pd.date_range('2017/02/18', '2017/03/18', freq=sum_offset))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 移动数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2017-02-18    0.400190\n",
      "2017-02-25    1.495394\n",
      "2017-03-04   -1.331107\n",
      "2017-03-11    2.943859\n",
      "2017-03-18    0.813070\n",
      "Freq: W-SAT, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "ts = pd.Series(np.random.randn(5), index=pd.date_range('20170218', periods=5, freq='W-SAT'))\n",
    "print(ts)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2017-02-18         NaN\n",
      "2017-02-25    0.400190\n",
      "2017-03-04    1.495394\n",
      "2017-03-11   -1.331107\n",
      "2017-03-18    2.943859\n",
      "Freq: W-SAT, dtype: float64\n",
      "2017-02-18    1.495394\n",
      "2017-02-25   -1.331107\n",
      "2017-03-04    2.943859\n",
      "2017-03-11    0.813070\n",
      "2017-03-18         NaN\n",
      "Freq: W-SAT, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(ts.shift(1))\n",
    "print(ts.shift(-1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
